package com.offcn.bigdata.spark.p4

import org.apache.spark.{SparkConf, SparkContext}

/**
  * 二次排序
  *     排序字段不为1
  */
object _02SecondSortOps {
    def main(args: Array[String]): Unit = {
        val conf = new SparkConf()
            .setAppName(s"${_02SecondSortOps.getClass.getSimpleName}")
            .setMaster("local[*]")
        val sc = new SparkContext(conf)

        val list = sc.parallelize(List(
            "a is abstraction in spark is",
            "shared spark that abstraction be is in spark abstraction"
        ), 2)
        var pairs = list.flatMap(_.split("\\s+")).map(word => {
            (word, 1)
        })
        val ret = pairs.reduceByKey(_+_)
        //按照次数的降序，次数相同时候按照字母的升序？
        ret.map{case (word, count) => (KeyValue(word, count), "")}
            .sortByKey(ascending = true, numPartitions = 1)
            .foreach{case (kv, tag) => {
                println(kv)
            }}


        sc.stop
    }
}

case class KeyValue(word: String, count: Int) extends Ordered[KeyValue] {
    override def compare(that: KeyValue): Int = {
        var ret = that.count.compareTo(count)
        if(ret == 0) {
            ret = this.word.compareTo(that.word)
        }
        ret
    }
}